notebook.community

Edit and run



In [3]:

    
from skbio.core.alignment import local_pairwise_align_ssw
s1 = "ACTAAGGCTCTCTACCCCTCTCAGAGA"
s2 = "AAAAAACTCTCTAAACTCACTAAGGCTCTCTACCCCTCTTCAGAGAAGTCGA"
r = local_pairwise_align_ssw(s1, s2)
print r









    



>query
ACTAAGGCTCTCTACCCCTC-TCAGAGA
>target
ACTAAGGCTCTCTACCCCTCTTCAGAGA



In [1]:

    
from skbio.core.alignment.pairwise import local_pairwise_align_nucleotide
s1 = "ACTAAGGCTCTCTACCCCTCTCAGAGA"
s2 = "AAAAAACTCTCTAAACTCACTAAGGCTCTCTACCCCTCTTCAGAGAAGTCGA"
r = local_pairwise_align_nucleotide(s1, s2)
print(r.to_fasta())









    



>0
ACTAAGGCTCTCTACCCCTC-TCAGAGA
>1
ACTAAGGCTCTCTACCCCTCTTCAGAGA







    



/Users/caporaso/code/skbio/skbio/core/alignment/pairwise.py:300: EfficiencyWarning: You're using skbio's python implementation of Smith-Waterman alignment. This will be very slow (e.g., thousands of times slower) than skbio.core.alignment.local_pairwise_align_ssw.
  EfficiencyWarning)



In [19]:

    
from skbio.parse.sequences import parse_fasta
from skbio import SequenceCollection, DNA
from random import choice
gg_path = "/Users/caporaso/data/gg_13_8_otus/rep_set/73_otus.fasta"
s = SequenceCollection.from_fasta_records([(i, s) for i, s in parse_fasta(gg_path) if 'N' not in s], DNA)



In [21]:

    
%timeit local_pairwise_align_ssw(choice(s), choice(s), gap_open_penalty=5, gap_extend_penalty=2, match_score=2, mismatch_score=-3)









    



100 loops, best of 3: 4.88 ms per loop



In [22]:

    
%timeit local_pairwise_align_nucleotide(choice(s), choice(s), gap_open_penalty=5, gap_extend_penalty=2, match_score=2, mismatch_score=-3)









    



1 loops, best of 3: 19.7 s per loop



In [23]:

    
19.7 / 0.00488









    Out[23]:





4036.8852459016393

So SSW is ~4000x faster than python for local alignment. Awesome!